@INPROCEEDINGS{Wang:VisIO,
author={Mitchell, C. and Ahrens, J. and Jun Wang},
booktitle={IPDPS, 2011 IEEE International}, title={VisIO: Enabling Interactive Visualization of Ultra-Scale, Time Series Data via High-Bandwidth Distributed I/O Systems},
year={May},
pages={68-79},
keywords={application program interfaces;data visualisation;distributed processing;file organisation;input-output programs;message passing;time series;HDFS;I/O library;I/O subsystem;Lustre installation;MPI file system semantics;N-to-N reads;POSIX file system semantics;TACC Longhorn cluster;VisIO system;data partitioning rules;file formats;global ocean salinity simulation;hadoop distributed file system;high-bandwidth distributed I/O systems;interactive visualization;nonPOSIX distributed file system;parallel enabled readers;petascale simulations;shared-nothing approach;ultra-scale time series data;Computational modeling;Data visualization;Distributed databases;File systems;Libraries;Pipelines;Testing},
ISSN={1530-2075},}

@InProceedings{ref:probegarth,
        title = {PRObE: A Thousand-Node Experimental Cluster for Computer Systems Research},
        author = {Garth Gibson and Gary Grider and Andree Jacobson and Wyatt Lloyd},
        journal = {USENIX ;login:},
        volume = {38},
        number = {3},
        year = {2013},
        month = {June},
        url = "\url{https://www.usenix.org/publications/login/june-2013-volume-38-number-3/probe-thousand-node-experimental-cluster-computer}",
}


@article{ahrens2005paraview,
  title={Paraview: An end-user tool for large data visualization},
  author={Ahrens, James and Geveci, Berk and Law, Charles},
  journal={The Visualization Handbook},
  volume={717},
  pages={731},
  year={2005},
  publisher={Elsevier}
}

@article{Lin:mpiBLAST-pio,
 author = {Lin, Heshan and Ma, Xiaosong and Feng, Wuchun and Samatova, Nagiza F.},
 title = {Coordinating Computation and I/O in Massively Parallel Sequence Search},
 journal = {IEEE Trans. Parallel Distrib. Syst.},
 issue_date = {April 2011},
 volume = {22},
 number = {4},
 month = apr,
 year = {2011},
 issn = {1045-9219},
 pages = {529--543},
 numpages = {15},
 acmid = {1957581},
 publisher = {IEEE Press},
 address = {Piscataway, NJ, USA},
 keywords = {BLAST., Scheduling, Scheduling, parallel I/O, bioinformatics, parallel genomic sequence search, BLAST., bioinformatics, parallel I/O, parallel genomic sequence search},
} 


@inproceedings{MRAP,
 author = {Sehrish, Saba and Mackey, Grant and Wang, Jun and Bent, John},
 title = {MRAP: A Novel MapReduce-based Framework to Support HPC Analytics Applications with Access Patterns},
 booktitle = {Proceedings of the 19th ACM International Symposium on High Performance Distributed Computing},
 series = {HPDC '10},
 year = {2010},
 isbn = {978-1-60558-942-8},
 location = {Chicago, Illinois},
 pages = {107--118},
 numpages = {12},
 url = {http://doi.acm.org/10.1145/1851476.1851490},
 doi = {10.1145/1851476.1851490},
 acmid = {1851490},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {HPC analytics applications, HPC data access patterns, I/O performance of HPC applications, MapReduce, large-scale data processing systems},
} 

@inproceedings{Spark,
  title={Resilient distributed datasets: A fault-tolerant abstraction for in-memory cluster computing},
  author={Zaharia, Matei and Chowdhury, Mosharaf and Das, Tathagata and Dave, Ankur and Ma, Justin and McCauley, Murphy and Franklin, Michael and Shenker, Scott and Stoica, Ion},
  booktitle={Proceedings of the 9th USENIX conference on Networked Systems Design and Implementation},
  pages={2--2},
  year={2012},
  organization={USENIX Association}
}

@article{isard2007dryad,
  title={Dryad: distributed data-parallel programs from sequential building blocks},
  author={Isard, Michael and Budiu, Mihai and Yu, Yuan and Birrell, Andrew and Fetterly, Dennis},
  journal={ACM SIGOPS Operating Systems Review},
  volume={41},
  number={3},
  pages={59--72},
  year={2007},
  publisher={ACM}
}

@inproceedings{ghemawat2003google,
  title={The Google file system},
  author={Ghemawat, Sanjay and Gobioff, Howard and Leung, Shun-Tak},
  booktitle={ACM SIGOPS Operating Systems Review},
  volume={37},
  number={5},
  pages={29--43},
  year={2003},
  organization={ACM}
}

@article{dean2008mapreduce,
  title={MapReduce: simplified data processing on large clusters},
  author={Dean, Jeffrey and Ghemawat, Sanjay},
  journal={Communications of the ACM},
  volume={51},
  number={1},
  pages={107--113},
  year={2008},
  publisher={ACM}
}

@inproceedings{IOforwarding,
  title={ZOID: I/O-forwarding infrastructure for petascale architectures},
  author={Iskra, Kamil and Romein, John W and Yoshii, Kazutomo and Beckman, Pete},
  booktitle={Proceedings of the 13th ACM SIGPLAN Symposium on Principles and practice of parallel programming},
  pages={153--162},
  year={2008},
  organization={ACM}
}


@inproceedings{schwan2003lustre,
  title={Lustre: Building a file system for 1000-node clusters},
  author={Schwan, Philip},
  booktitle={Proceedings of the 2003 Linux Symposium},
  volume={2003},
  year={2003}
}

@article{sun2007server,
  title={Server-based data push architecture for multi-processor environments},
  author={Sun, Xian-He and Byna, Surendra and Chen, Yong},
  journal={Journal of Computer Science and Technology},
  volume={22},
  number={5},
  pages={641--652},
  year={2007},
  publisher={Springer}
}

@misc{Hadoop,
  title={The hadoop distributed file system: Architecture and design},
  author={Borthakur, Dhruba},
  year={2007}
}

@inproceedings{Mesos,
  title={Mesos: A platform for fine-grained resource sharing in the data center},
  author={Hindman, Benjamin and Konwinski, Andy and Zaharia, Matei and Ghodsi, Ali and Joseph, Anthony D and Katz, Randy and Shenker, Scott and Stoica, Ion},
  booktitle={Proceedings of the 8th USENIX conference on Networked systems design and implementation},
  pages={22--22},
  year={2011},
  organization={USENIX Association}
}

@techreport{abola1984protein,
  title={Protein data bank},
  author={Abola, Enrique E and Bernstein, Frances C and Koetzle, Thomas F},
  year={1984},
  institution={Brookhaven National Lab., Upton, NY (USA)}
}

@inproceedings{chen2012decoupled,
  title={A decoupled execution paradigm for data-intensive high-end computing},
  author={Chen, Yong and Chen, Chao and Sun, Xian-He and Gropp, William D and Thakur, Rajeev},
  booktitle={Cluster Computing (CLUSTER), 2012 IEEE International Conference on},
  pages={200--208},
  year={2012},
  organization={IEEE}
}


@inproceedings{ross2000pvfs,
  title={PVFS: A parallel file system for Linux clusters},
  author={Ross, Robert B and Thakur, Rajeev and others},
  booktitle={in Proceedings of the 4th Annual Linux Showcase and Conference},
  pages={391--430},
  year={2000}
}

@techreport{callaghan1995nfs,
  title={NFS version 3 protocol specification},
  author={Callaghan, Brent and Pawlowski, Brian and Staubach, Peter},
  year={1995},
  institution={RFC 1813, Network Working Group}
}

@article{pettersen2004Chimera,
  title={UCSF Chimera-a visualization system for exploratory research and analysis},
  author={Pettersen, Eric F and Goddard, Thomas D and Huang, Conrad C and Couch, Gregory S and Greenblatt, Daniel M and Meng, Elaine C and Ferrin, Thomas E},
  journal={Journal of computational chemistry},
  volume={25},
  number={13},
  pages={1605--1612},
  year={2004},
  publisher={Wiley Online Library}
}

@book{ref:paraview,
  title={The ParaView guide: a parallel visualization application},
  author={Squillacote, Amy Henderson},
  year={2007},
  publisher={Kitware}
}

@article{ostell2005databases,
  title={Databases of discovery},
  author={Ostell, James},
  journal={Queue},
  volume={3},
  number={3},
  pages={40--48},
  year={2005},
  publisher={ACM}
}

@article{2008genbank,
  title={GenBank--Natural History in the 21st Century?},
  author={Strasser, Bruno J},
  journal={Science},
  volume={322},
  number={5901},
  pages={537--538},
  year={2008},
  publisher={American Association for the Advancement of Science}
}

@article{benson2010genbank,
  title={GenBank},
  author={Benson, Dennis A and Karsch-Mizrachi, Ilene and Lipman, David J and Ostell, James and Sayers, Eric W},
  journal={Nucleic acids research},
  volume={38},
  number={suppl 1},
  pages={D46--D51},
  year={2010},
  publisher={Oxford Univ Press}
}

@article{Roadrunner,
  author={Salman Habib and Adrian Pope and Zarija Lukic and David Daniel and Patricia Fasel and Nehal Desai and Katrin
Heitmann and Chung-Hsing Hsu and Lee Ankeny and Graham Mark and Suman Bhattacharya and James Ahrens},
  title={Hybrid petacomputing meets cosmology: The Roadrunner Universe project},
  journal={Journal of Physics: Conference Series},
  volume={180},
  number={1},
  pages={012019},
  url={http://stacks.iop.org/1742-6596/180/i=1/a=012019},
  year={2009},
  abstract={The target of the Roadrunner Universe project at Los Alamos National Laboratory is a set of very large cosmological N-body simulation runs on the hybrid supercomputer Roadrunner, the world's first petaflop platform. Roadrunner's architecture presents opportunities and difficulties characteristic of next-generation supercomputing. We describe a new code designed to optimize performance and scalability by explicitly matching the underlying algorithms to the machine architecture, and by using the physics of the problem as an essential aid in this process. While applications will differ in specific exploits, we believe that such a design process will become increasingly important in the future. The Roadrunner Universe project code, MC 3 (Mesh-based Cosmology Code on the Cell), uses grid and direct particle methods to balance the capabilities of Roadrunner's conventional (Opteron) and accelerator (Cell BE) layers. Mirrored particle caches and spectral techniques are used to overcome communication bandwidth limitations and possible difficulties with complicated particle-grid interaction templates.}
}


@INPROCEEDINGS{MR_MPI,
author={Seung-Jin Sul and Tovchigrechko, A.},
booktitle={Parallel and Distributed Processing Workshops and Phd Forum (IPDPSW), 2011 IEEE International Symposium on},
title={Parallelizing BLAST and SOM Algorithms with MapReduce-MPI Library},
year={2011},
pages={481-489},
keywords={C++ language;bioinformatics;learning (artificial intelligence);message passing;parallel processing;public domain software;self-organising feature maps;BLAST;HPC;HTC;MapReduce-MPI Library;NCBI C++ Toolkit;NSF Tera Grid;SOM algorithm;bioinformatics algorithm;file system level;global synchronization;high performance computing;high-throughput computing;machine-learning algorithm;open-source bioinformatics application;self-organizing map;Bioinformatics;Clustering algorithms;Computer architecture;Databases;File systems;Libraries;Parallel processing},
ISSN={1530-2075},}

@INPROCEEDINGS{sun,
author={Yanlong Yin and Jibing Li and Jun He and Xian-He Sun and Thakur, R.},
booktitle={Parallel Distributed Processing (IPDPS), 2013 IEEE 27th International Symposium on},
title={Pattern-Direct and Layout-Aware Replication Scheme for Parallel I/O Systems},
year={2013},
pages={345-356},
keywords={input-output programs;parallel processing;HPC;PDLA data replication scheme;access cost analysis;data access pattern;high performance computing;optimized data layouts;parallel I/O systems;pattern-direct and layout-aware data replication scheme;Computational modeling;Data models;Layout;Optimization;Prototypes;Runtime;System analysis and design;I/O optimization;Parallel I/O;data access pattern;data reorganization;data replication},
ISSN={1530-2075},}

@INPROCEEDINGS{FlexIO,
author={Fang Zheng and Hongbo Zou and Eisenhauer, G. and Schwan, K. and Wolf, M. and Dayal, J. and Tuan-Anh Nguyen and Jianting Cao and Abbasi, H. and Klasky, S. and Podhorszki, N. and Hongfeng Yu},
booktitle={Parallel Distributed Processing (IPDPS), 2013 IEEE 27th International Symposium on},
title={FlexIO: I/O Middleware for Location-Flexible Scientific Data Analytics},
year={2013},
pages={320-331},
keywords={data analysis;input-output programs;middleware;parallel machines;storage management;FlexIO middleware;I/O bottleneck;I/O hierarchy;I/O path;data storage;disk;diverse data movement method;dynamic data manipulation functionality deployment;flexible data placement policy;high end computing machine;location flexible scientific data analytics;online simulation data processing;performing analytics;persistent storage;Analytical models;Arrays;Computational modeling;Data models;Monitoring;Runtime;Software;Flexibility;I/O;In Situ Data Analytics;Placemen},
ISSN={1530-2075},}

@INPROCEEDINGS{ICIO,
author={Yongen Yu and Jingjin Wu and Zhiling Lan and Rudd, D.H. and Gnedin, N.Y. and Kravtsov, A.},
booktitle={Parallel Distributed Processing (IPDPS), 2013 IEEE 27th International Symposium on},
title={A Transparent Collective I/O Implementation},
year={2013},
pages={297-307},
keywords={application program interfaces;optimising compilers;parallel programming;software libraries;HPC application;MPI-IO;TCIO;application development;complex data access;dynamic data sizes;file view mechanism;noncontiguous access patterns;programming efforts;real cosmology application;single collective I/O access;transparent collective I/O optimization;user-level library;Arrays;File systems;Layout;Libraries;Optimization;Programming;Synchronization;Collective I/O;HPC;I/O intensive applications;MPI;One-sided communication;Parallel I/O;Transparent Collective I/O},
ISSN={1530-2075}
}

@inproceedings{iBridge,
 author = {Zhang, Xuechen and Liu, Ke and Davis, Kei and Jiang, Song},
 title = {iBridge: Improving Unaligned Parallel File Access with Solid-State Drives},
 booktitle = {Proceedings of the 2013 IEEE 27th International Symposium on Parallel and Distributed Processing},
 series = {IPDPS '13},
 year = {2013},
 isbn = {978-0-7695-4971-2},
 pages = {381--392},
 numpages = {12},
 acmid = {2511353},
 publisher = {IEEE Computer Society},
 address = {Washington, DC, USA},
 keywords = {Solid state drive, parallel file systems, parallel I/O}
} 

@article{Disk-Cache,
author = {Ramya Prabhakar and Mahmut Kandemir and Myoungsoo Jung},
title = {Disk-Cache and Parallelism Aware I/O Scheduling to Improve Storage System Performance},
journal ={Parallel and Distributed Processing Symposium, International},
volume = {0},
issn = {1530-2075},
year = {2013},
pages = {357-368},
publisher = {IEEE Computer Society},
address = {Los Alamitos, CA, USA}
}

@conference {VisualNetworkIO,
	title = {A Visual Network Analysis Method for Large Scale Parallel I/O Systems},
	booktitle = {International Parallel and Distributed Processing Symposium (IPDPS 2013)},
	year = {2012},
	publisher = {IEEE},
	organization = {IEEE},
	author = {C. Sigovan and C. Muelder and K. Ma and J. Cope and K. Iskra and Robert B. Ross}
}

@article{BLAST,
  title={Basic local alignment search tool},
  author={Altschul, Stephen F and Gish, Warren and Miller, Webb and Myers, Eugene W and Lipman, David J and others},
  journal={Journal of molecular biology},
  volume={215},
  number={3},
  pages={403--410},
  year={1990},
  publisher={Elsevier Science}
}

@article{garey1976complexity,
  title={The complexity of flowshop and jobshop scheduling},
  author={Garey, Michael R and Johnson, David S and Sethi, Ravi},
  journal={Mathematics of operations research},
  volume={1},
  number={2},
  pages={117--129},
  year={1976},
  publisher={INFORMS}
}

@INPROCEEDINGS{bCloudBLAST,
author={Zhen Meng and Li, Jianhui and Yunchun Zhou and Qi Liu and Yong Liu and Wei Cao},
booktitle={Biomedical Engineering and Informatics (BMEI), 2011 4th International Conference on}, title={bCloudBLAST: An efficient mapreduce program for bioinformatics applications},
year={2011},
volume={4},
pages={2072-2076},
keywords={DNA;Java;Linux;bioinformatics;molecular biophysics;molecular configurations;parallel databases;proteins;query processing;DNA databases;Hadoop libraries;Java;Linux;MapReduce program;NacOS systems;UNIX;Windows;bCloudBLAST;bioinformatics;input query sequence files;parallel implementation;protein databases;sequence databases;sequence similarities;Bioinformatics;Computer architecture;Databases;Phylogeny;Protein sequence;Virtual machining;BLAST;Bioinformatics;Cloud computing;MapReduce;bCloudBLAST},
}

@INPROCEEDINGS{Grider:PaScal,
author={Grider, G. and Chen, H. and Nunez, J. and Poole, S. and Wacha, R. and Fields, P. and Martinez, R. and Martinez, P. and Khalsa, S. and Matthews, A. and Gibson, G.},
booktitle={Performance, Computing, and Communications Conference, 2006. IPCCC 2006. 25th IEEE International}, title={PaScal - a new parallel and scalable server IO networking infrastructure for supporting global storage/file systems in large-size Linux clusters},
year={2006},
pages={10 pp.-340},
keywords={IP networks;Linux;file servers;resource allocation;routing protocols;workstation clusters;IO networking infrastructure;IP based global storage system;IPC computing;Linux cluster;PaScal;gigabit Ethernet;global storage-file system;inter-process communication;load balancing;multilevel switch-fabric interconnection network;multipath routing;open-standard IP network;parallel and scalable server;Bandwidth;Communication switching;Computer networks;File servers;File systems;Large-scale systems;Linux;Multiprocessor interconnection networks;Network servers;Routing},
}


@inproceedings{Avron:shared-memory,
 author = {Avron, Haim and Gupta, Anshul},
 title = {Managing data-movement for effective shared-memory parallelization of out-of-core sparse solvers},
 booktitle = {Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis},
 series = {SC '12},
 year = {2012},
 isbn = {978-1-4673-0804-5},
 location = {Salt Lake City, Utah},
 pages = {102:1--102:11},
 articleno = {102},
 numpages = {11},
 acmid = {2389134},
 publisher = {IEEE Computer Society Press},
 address = {Los Alamitos, CA, USA},
} 

@book{MPI,
  title={MPI: the complete reference},
  author={Snir, Marc and Otto, Steve W and Walker, David W and Dongarra, Jack and Huss-Lederman, Steven},
  year={1995},
  publisher={MIT press}
}

@article{Ientification,
  title={Ientification of Common Molecular Subsequences},
  author={T. Smith and M. Waterman},
  journal={Journal of molecular biology},
  volume={174},
  number={2},
  pages={195--197},
  year={1881},
  publisher={Elsevier Science}
}

@article{needleman1970general,
  title={A general method applicable to the search for similarities in the amino acid sequence of two proteins},
  author={Needleman, Saul B and Wunsch, Christian D and others},
  journal={Journal of molecular biology},
  volume={48},
  number={3},
  pages={443--453},
  year={1970},
  publisher={Elsevier Science}
}

@article{pearson1988improved,
  title={Improved tools for biological sequence comparison},
  author={Pearson, William R and Lipman, David J},
  journal={Proceedings of the National Academy of Sciences},
  volume={85},
  number={8},
  pages={2444--2448},
  year={1988},
  publisher={National Acad Sciences}
}

@article{mpiBLAST:design,
  title={The design, implementation, and evaluation of mpiBLAST},
  author={Darling, Aaron and Carey, Lucas and Feng, Wu-chun},
  journal={Proceedings of ClusterWorld},
  volume={2003},
  year={2003}
}

@article{BLAST:parallelization,
  title={Parallelization of local BLAST service on workstation clusters},
  author={Braun, RC and Pedretti, Kevin T and Casavant, Thomas L and Scheetz, Todd E and Birkett, CL and Roberts, Chad A},
  journal={Future Generation Computer Systems},
  volume={17},
  number={6},
  pages={745--754},
  year={2001},
  publisher={Elsevier}
}

@article{BLAST:PSI-BLAST,
  title={Gapped BLAST and PSI-BLAST: a new generation of protein database search programs},
  author={Altschul, Stephen F and Madden, Thomas L and Sch{\"a}ffer, Alejandro A and Zhang, Jinghui and Zhang, Zheng and Miller, Webb and Lipman, David J},
  journal={Nucleic acids research},
  volume={25},
  number={17},
  pages={3389--3402},
  year={1997},
  publisher={Oxford Univ Press}
}

@ARTICLE{ScalaBLAST,
author={Oehmen, C. and Nieplocha, Jarek},
journal={Parallel and Distributed Systems, IEEE Transactions on}, title={ScalaBLAST: A Scalable Implementation of BLAST for High-Performance Data-Intensive Bioinformatics Analysis},
year={2006},
volume={17},
number={8},
pages={740-749},
keywords={DNA;biology computing;distributed shared memory systems;genetics;proteins;sequences;storage management;very large databases;DNA;ScalaBLAST;bacterial genome;data prefetching;distributed memory;genetic code;high-performance data-intensive bioinformatics analysis;latency hiding;mammalian genome;multilevel parallelism;parallel I/O;protein components;protein information;sequence alignment;sequence matching problem;shared memory architecture;task scheduling;very large databases;Assembly;Bioinformatics;DNA;Data analysis;Databases;Genetics;Genomics;Microorganisms;Proteins;Sequences;BLAST;Global Arrays.;High-performance sequence alignment},
ISSN={1045-9219},
}

@article{Globalarrays,
year={1996},
issn={0920-8542},
journal={The Journal of Supercomputing},
volume={10},
number={2},
title={Global arrays: A nonuniform memory access programming model for high-performance computers},
publisher={Kluwer Academic Publishers},
keywords={NUMA architecture; parallel programming models; shared memory; parallel programming environments; distributed arrays; global arrays; one-sided communication; scientific computing; Grand Challenges; computational chemistry},
author={Nieplocha, Jaroslaw and Harrison, RobertJ. and Littlefield, RichardJ.},
pages={169-189},
language={English}
}

@inproceedings{ParallelApproach,
 author = {Wu, Changjun and Kalyanaraman, Ananth},
 title = {An efficient parallel approach for identifying protein families in large-scale metagenomic data sets},
 booktitle = {Proceedings of the 2008 ACM/IEEE conference on Supercomputing},
 series = {SC '08},
 year = {2008},
 isbn = {978-1-4244-2835-9},
 location = {Austin, Texas},
 pages = {35:1--35:10},
 articleno = {35},
 numpages = {10},
 acmid = {1413406},
 publisher = {IEEE Press},
 address = {Piscataway, NJ, USA},
} 

@inproceedings{NucleotideAlignment,
 author = {Borovska, Plamenka and Gancheva, Veska and Markov, Stoyan},
 title = {Parallel performance evaluation of sequence nucleotide alignment on the supercomputer BlueGene/P},
 booktitle = {Proceedings of the 5th European conference on European computing conference},
 series = {ECC'11},
 year = {2011},
 isbn = {978-960-474-297-4},
 location = {Paris, France},
 pages = {462--467},
 numpages = {6},
 acmid = {1991094},
 publisher = {World Scientific and Engineering Academy and Society (WSEAS)},
 address = {Stevens Point, Wisconsin, USA},
 keywords = {biocomputing, high performance computing, human genome, influenza virus, mpiBLAST, parallel performance, sequences alignment},
} 

@inproceedings{In-situProcessing,
 author = {Bennett, Janine C. and Abbasi, Hasan and Bremer, Peer-Timo and Grout, Ray and Gyulassy, Attila and Jin, Tong and Klasky, Scott and Kolla, Hemanth and Parashar, Manish and Pascucci, Valerio and Pebay, Philippe and Thompson, David and Yu, Hongfeng and Zhang, Fan and Chen, Jacqueline},
 title = {Combining in-situ and in-transit processing to enable extreme-scale scientific analysis},
 booktitle = {Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis},
 series = {SC '12},
 year = {2012},
 isbn = {978-1-4673-0804-5},
 location = {Salt Lake City, Utah},
 pages = {49:1--49:9},
 articleno = {49},
 numpages = {9},
 acmid = {2389063},
 publisher = {IEEE Computer Society Press},
 address = {Los Alamitos, CA, USA},
} 

@INPROCEEDINGS{DataManagement,
author={Zhang, Zhao and Katz, Daniel S. and Wozniak, Justin M. and Espinosa, Allan and Foster, Ian},
booktitle={High Performance Computing, Networking, Storage and Analysis (SC), 2012 International Conference for}, title={Design and analysis of data management in scalable parallel scripting},
year={2012},
pages={1-11},
keywords={Computer architecture;Computers;Databases;Engines;Optimization;Runtime;Servers},
ISSN={2167-4329},}


@INPROCEEDINGS{CloudBLAST,
author={Matsunaga, A. and Tsugawa, M. and Fortes, J.},
booktitle={eScience, 2008. eScience '08. IEEE Fourth International Conference on}, 
title={CloudBLAST: Combining MapReduce and Virtualization on Distributed Resources for Bioinformatics Applications},
year={Dec.},
pages={222-229},
keywords={application program interfaces;bioinformatics;message passing;virtual machines;wide area networks;CloudBLAST;MPI-based solution;MapReduce;WAN-based test bed;bioinformatics applications;bioinformatics tool;computing resources;distributed computing;distributed resources;message passing interface;network virtualization;virtual machines;virtual network technology;wide area networks;Application virtualization;Bioinformatics;Computer network management;Distributed computing;Environmental management;Resource management;Resource virtualization;Technology management;Testing;Virtual machining;Cloud computing;bioinformatics;mapreduce;virtualization},
}

@INPROCEEDINGS{lin:EfficientDataAccess, 
author={Heshan Lin and Xiaosong Ma and Chandramohan, P. and Geist, A. and Samatova, N.}, 
booktitle={Parallel and Distributed Processing Symposium, 2005. Proceedings. 19th IEEE International}, title={Efficient Data Access for Parallel BLAST}, 
year={April}, 
pages={72b-72b}, 
keywords={biology computing;data handling;message passing;parallel programming;public domain software;query processing;bioinformatics;biological sequence database;biological sequence search application;caching;computation-intensive alignment;computational biology;data access;data handling;data management overhead;database partitioning;mpiBLAST scalability;open-source parallel BLAST tool;optimization;parallel sequence search application;parallel sequence search tool;sequence alignment engine;Bioinformatics;Biology computing;Computational biology;Concurrent computing;Data handling;Databases;Engines;Merging;Sequences;Supercomputers}, 
}

@INPROCEEDINGS{sequence-searching:ad-hoc,
 author = {Gardner, Mark K. and Feng, Wu-chun and Archuleta, Jeremy and Lin, Heshan and Mal, Xiaosong},
 title = {Parallel genomic sequence-searching on an ad-hoc grid: experiences, lessons learned, and implications},
 booktitle = {Proceedings of the 2006 ACM/IEEE conference on Supercomputing},
 series = {SC '06},
 year = {2006},
 isbn = {0-7695-2700-0},
 location = {Tampa, Florida},
 articleno = {104},
 acmid = {1188564},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {BLAST, agile development, bioinformatics, cluster computing, fault tolerance, grid computing, optical networking, scalability, scheduling, scripting, sequence search},
}
@misc{ref:genedata,
  title     = {genomes:},
  howpublished = {http://aws.amazon.com/1000genomes/}
}

@misc{ref:vtk,
  title     = {VTK readers},
  howpublished = {http://www.vtk.org/}
}

@misc{ref:Marmot,
  title     = {Marmot},
  howpublished = {https://www.nmc-probe.org/wiki/Marmot:Nodes}
}

@misc{ref:fusehdfs,
  title     = {FUSE:},
  howpublished = {http://fuse.sourceforge.net/}
}

@misc{ref:Genomesproject,
  title     = {Genomes to Life project proposal},
  howpublished = {www.genomes2life.org/SNL-ORNL-GTL-Proposal.doc}
}

@misc{ref:Hadoop-Blast,
  title     = {Running Hadoop-Blast in Distributed Hadoop},
  howpublished = {http://salsahpc.indiana.edu/tutorial/hadoopblast.html}
}

@online{Genomesproject,
author = {G. Heffelfinger et al},
title = {Genomes to Life project proposal},
year = {•},
url = {www.genomes2life.org/SNL-ORNL-GTL-Proposal.doc},
OPTsubtitle = {•},
OPTtitleaddon = {•},
OPTlanguage = {•},
OPTversion = {•},
OPTnote = {•},
OPTorganization = {•},
OPTdate = {•},
OPTmonth = {•},
OPTaddendum = {•},
OPTpubstate = {•},
OPTurldate = {•},
}

@inproceedings{Lu:AzureBlast,
 author = {Lu, Wei and Jackson, Jared and Barga, Roger},
 title = {AzureBlast: a case study of developing science applications on the cloud},
 booktitle = {Proceedings of the 19th ACM International Symposium on High Performance Distributed Computing},
 series = {HPDC '10},
 year = {2010},
 isbn = {978-1-60558-942-8},
 location = {Chicago, Illinois},
 pages = {413--420},
 numpages = {8},
 acmid = {1851537},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {BLAST, Windows Azure, cloud computing},
} 




